# prevent library(sqldf) from triggering a tcl/tk dependency which causes R to exit on OS X if X11 isn’t installed. See https://code.google.com/p/sqldf/ for troubleshooting details.
options(gsubfn.engine = "R")
library('sqldf')
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
library('ggplot2')
library('zoo')
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library('reshape2')
# days : increasing sequence of integers
# k : positive number
# return data frame days,prev where prev=max day<=prev-k
lagDays <- function(days,k) {
prevI <- 1
dayVec <- numeric(length(days))
prevVec <- numeric(length(days))
i <- 0
for(curI in seq(2,length(days))) {
while((prevI<curI-1)&&(days[prevI+1]<=days[curI]-k)) {
prevI <- prevI + 1
}
if(days[prevI]<=days[curI]-k) {
i <- i+1
dayVec[[i]] <- days[curI]
prevVec[[i]] <- days[prevI]
}
}
data.frame(days=dayVec[seq_len(i)],prev=prevVec[seq_len(i)],
stringsAsFactors = FALSE)
}
compReturn <- function(Investment,k,colName) {
lags <- lagDays(Investment$dayCount,k)
rets <- sqldf('
SELECT
sp1.dayCount dayCount,
sp1.dayCount-sp2.dayCount width,
(sp1.Close - sp2.Close)/sp2.Close Return
FROM
lags
INNER JOIN
Investment sp1
ON
sp1.dayCount=lags.days
INNER JOIN
Investment sp2
ON
sp2.dayCount=lags.prev
')
rets[[colName]] <- ((1+rets$Return)^(k/rets$width)-1)
rets <- rets[,c('dayCount',colName),drop=FALSE]
r <- merge(Investment,rets,by='dayCount')
r
}
SharpeRatio <- function(Investment,column,window) {
means <- rollapply(Investment[[column]],width=window,FUN=mean)
means <- c(rep(NA,nrow(Investment)-length(means)),means)
sds <- rollapply(Investment[[column]],width=window,FUN=sd)
sds <- c(rep(NA,nrow(Investment)-length(sds)),sds)
sr <- means/sds
Investment[[paste('Mean',column,sep='_')]] <- means
Investment[[paste('StdDev',column,sep='_')]] <- sds
Investment[[paste('SharpeRatio',column,sep='_')]] <- sr
Investment
}
daysInYear <- 365.242
daysInMonth <- daysInYear/12
dayMap <- list('DailyReturn'=1,
'TenDayReturn'=10,
'MonthlyReturn'=daysInMonth,
'AnnualReturn'=daysInYear)
refName='TNX.csv'
for(instName in c('SP.csv','TENZ.csv')) {
print('********************************************************')
print(paste(refName,'vs',instName))
# comparison safe invesement: 10-year treasury
refInstrument <- read.table(refName,
header=TRUE,
sep=',',
stringsAsFactors=FALSE)
# confirm assumption length(unique(refInstrument$Date))==nrow(refInstrument)
if(length(unique(refInstrument$Date))!=nrow(refInstrument)) {
stop("duplicate day")
}
refInstrument$RefAnnualReturn <- refInstrument$Close/100
refInstrument <- refInstrument[,c('Date','RefAnnualReturn'),
drop=FALSE]
for(nm in names(dayMap)) {
if(nm!='AnnualReturn') {
refInstrument[[paste('Ref',nm,sep='')]] <-
(1+refInstrument$RefAnnualReturn)^(dayMap[[nm]]/daysInYear) - 1
}
}
# Investment we are analyzing
Investment <- read.table(instName,
header=TRUE,
sep=',',
stringsAsFactors=FALSE)
# confirm assumption length(unique(Investment$Date))==nrow(Investment)
if(length(unique(Investment$Date))!=nrow(Investment)) {
stop("duplicate day")
}
Investment <- Investment[,c('Date','Close')]
# restrict down to days we have both infos and merge refInstrument into Investment
commonDays <- intersect(refInstrument$Date,Investment$Date)
refInstrument <- refInstrument[refInstrument$Date %in% commonDays,,drop=FALSE]
Investment <- Investment[Investment$Date %in% commonDays,,drop=FALSE]
Investment <- merge(Investment,refInstrument,by='Date')
# number the days
day <- as.POSIXct(Investment$Date,tz="UTC",format="%Y-%m-%d")
baseDay <- min(day)
Investment$dayCount <- as.numeric(difftime(day,baseDay,units='days'))
# confirm assumption length(unique(refInstrument$dayCount))==nrow(refInstrument)
if(length(unique(Investment$dayCount))!=nrow(Investment)) {
stop("duplicate day")
}
Investment$day <- day
# get Investment into dates moving forward order
Investment <- Investment[order(Investment$dayCount),,drop=FALSE]
for(icol in names(dayMap)) {
print('*****************************************')
window <- min(500,max(10,round(3*dayMap[[icol]])))
stepName <- paste(instName,'vs',refName,icol)
print(stepName)
Investment <- compReturn(Investment,dayMap[[icol]],icol)
col <- paste('D',icol,sep='')
# get the Difference in returns or excess return
Investment[[col]] <- Investment[[icol]] - Investment[[paste('Ref',icol,sep='')]]
print(ggplot(Investment[!is.na(Investment[[col]]),,drop=FALSE],
aes_string(x='day',y=col)) +
geom_point() + geom_smooth() +
ggtitle(stepName))
ncol <- paste('SharpeRatio',col,sep='_')
Investment <- SharpeRatio(Investment,col,window)
meanCol <- paste('Mean',col,sep='_')
stdCol <- paste('StdDev',col,sep='_')
msPlotFrame <- melt(Investment[!is.na(Investment[[stdCol]]),
c('day',meanCol,stdCol),drop=FALSE],
id='day',
value.name='return')
print(ggplot(msPlotFrame,
aes_string(x='day',y='return',
color='variable',linetype='variable')) +
geom_line() +
scale_color_manual(values=c("green", "red")) +
ggtitle(paste(stepName,'\nreturn mean/stdev window=',window,'days')) +
theme(legend.position="bottom"))
print(summary(Investment[[ncol]]))
print(ggplot(Investment[!is.na(Investment[[ncol]]),,drop=FALSE],
aes_string(x='day',y=ncol)) +
geom_point() +
ggtitle(paste(stepName,'\nSharpe ratio window=',window,'days')))
print('*****************************************')
}
print('********************************************************')
}
## [1] "********************************************************"
## [1] "TNX.csv vs SP.csv"
## [1] "*****************************************"
## [1] "SP.csv vs TNX.csv DailyReturn"
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.


## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -1.70000 -0.18700 0.05933 0.05265 0.29580 1.89400 9

## [1] "*****************************************"
## [1] "*****************************************"
## [1] "SP.csv vs TNX.csv TenDayReturn"
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.


## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -2.77300 -0.29330 0.07500 0.08287 0.44890 2.65600 29

## [1] "*****************************************"
## [1] "*****************************************"
## [1] "SP.csv vs TNX.csv MonthlyReturn"
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.


## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -1.59400 -0.28750 0.09392 0.12310 0.48180 2.71300 90

## [1] "*****************************************"
## [1] "*****************************************"
## [1] "SP.csv vs TNX.csv AnnualReturn"
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.


## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -3.7610 -0.3359 0.1680 0.3037 0.9634 3.4340 499

## [1] "*****************************************"
## [1] "********************************************************"
## [1] "********************************************************"
## [1] "TNX.csv vs TENZ.csv"
## [1] "*****************************************"
## [1] "TENZ.csv vs TNX.csv DailyReturn"
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.


## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -88.34000 -0.21080 0.00586 -0.09961 0.21600 0.96070 9

## [1] "*****************************************"
## [1] "*****************************************"
## [1] "TENZ.csv vs TNX.csv TenDayReturn"
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.


## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -2.17800 -0.34140 0.01509 0.01901 0.35850 2.08800 29

## [1] "*****************************************"
## [1] "*****************************************"
## [1] "TENZ.csv vs TNX.csv MonthlyReturn"
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.


## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -1.54000 -0.28400 0.01201 0.05131 0.37610 1.97000 90

## [1] "*****************************************"
## [1] "*****************************************"
## [1] "TENZ.csv vs TNX.csv AnnualReturn"
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.


## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -1.0400 -0.7466 -0.1724 -0.1326 0.6514 0.7719 499

## [1] "*****************************************"
## [1] "********************************************************"